'''
Created on Nov 7, 2012

@author: peng
'''

data_dict = {}

for line in open(''):
    values = line.replace('\n', '').split('\t')
    key_1 = '\t'.join([[values[0], values[1], values[2], values[3],
                      '--', values[5], values[6]]])

    key_2 = '\t'.join([[values[0], values[1], values[2], '--',
                      values[4], values[5], values[6]]])

    key_3 = '\t'.join([[values[0], values[1], values[2], '--',
                      '--', values[5], values[6]]])

    if key_1 not in data_dict:
        data_dict[key_1] = int(values[7])
    else:
        data_dict[key_1] += int(values[7])

    if key_2 not in data_dict:
        data_dict[key_2] = int(values[7])
    else:
        data_dict[key_2] += int(values[7])

    if key_3 not in data_dict:
        data_dict[key_3] = int(values[7])
    else:
        data_dict[key_3] += int(values[7])


f = open('/home/hadoop/test/all_all', 'w')

for key in data_dict.keys():
    line = key + '\t' + str(data_dict[key]) + '\n'
    f.write(line)

f.close()
